Code to generate state vectors
library(rstan)
options(mc.cores=4)
rstan_options("auto_write" = TRUE)
library(tidyverse)
select_new_state <- function(transition_probs) {
draw <- rmultinom(1, 1, transition_probs)
which(draw==1)
}
simulate_hmm_states <- function(n_steps, state_initial, m_transition_probs) {
states <- vector(length = n_steps)
state <- state_initial
for(i in 1:n_steps) {
transition_probs <- m_transition_probs[state, ]
state <- select_new_state(transition_probs)
states[i] <- state
}
states
}
optimise_repeat <- function(n_opt, model, data_stan) {
best_val <- -Inf
for(i in 1:n_opt) {
fit <- optimizing(model, data=data_stan, as_vector=FALSE)
val <- fit$value
if(val > best_val) {
best_val <- val
fit_best <- fit
}
}
fit_best
}
Generate observations from a (1 - w) * normal + w * student_t
r_student_t <- function(mu, sigma, df) {
mu + sigma * rt(n = 1, df = df) #/ sqrt(df / (df - 2))
}
tainted_normal <- function(w, mu, sigma, df) {
u <- runif(1)
if(u < w)
x <- r_student_t(mu, sigma, df)
else
x <- rnorm(1, mu, sigma)
x
}
generate_emissions <- function(states, w, mus, sigmas, dfs) {
xs <- vector(length = length(states))
for(i in seq_along(xs))
xs[i] <- tainted_normal(
w, mus[states[i]], sigmas[states[i]], dfs[states[i]])
xs
}
simulate_hmm <- function(n_steps,
state_initial,
m_transition_probs,
w, mus, sigmas, dfs) {
states <- simulate_hmm_states(n_steps, state_initial, m_transition_probs)
x <- generate_emissions(states, w, mus, sigmas, dfs)
df <- tibble(state=states, obs=x) %>%
mutate(time=seq_along(state)) %>%
mutate(state=as.factor(state))
df
}
mus <- c(1, 8)
sigmas <- c(1, 1)
dfs <- c(3, 3)
K <- 2
m_transition_probs <- matrix(c(0.9, 0.1, 0.1, 0.9), ncol = 2)
n_steps <- 1000
df <- simulate_hmm(n_steps, 1, m_transition_probs, 0, mus, sigmas, dfs)
df %>%
ggplot(aes(x=time, y=obs)) +
geom_line() +
geom_point(aes(colour=state))
g <- df %>%
ggplot(aes(x=time, y=obs)) +
geom_line() +
geom_point() +
xlab("Time") +
ylab(latex2exp::TeX("$\\Y_t"))
ggsave("../figures/simulated_normal.pdf", g, width = 6,
height = 3)
Fit models
model_normal <- stan_model("hmm.stan")
model_student_t <- stan_model("hmm_student_t.stan")
model_robust <- stan_model("hmm_beta_divergence.stan")
data_stan <- list(
N=nrow(df),
dist=df$obs,
K=2,
beta=0.1
)
n_opt <- 2
fit_normal <- optimise_repeat(n_opt, model_normal, data=data_stan)
fit_student_t <- optimise_repeat(n_opt, model_student_t, data=data_stan)
Warning in .local(object, ...) : non-zero return code in optimizing
fit_robust <- optimise_repeat(n_opt, model_robust, data=data_stan)
df <- df %>%
mutate(normal=fit_normal$par$state,
student_t=fit_student_t$par$state,
robust=fit_robust$par$state) %>%
mutate(normal=as.factor(normal),
student_t=as.factor(student_t),
robust=as.factor(robust))
df %>%
pivot_longer(-c("time", "obs")) %>%
ggplot(aes(x=time, y=obs)) +
geom_line() +
geom_point(aes(colour=value)) +
facet_wrap(~name)
# some issue with algo for t=1
df$normal[1] <- 1
g <- df %>%
ggplot(aes(x=time, y=obs)) +
geom_line() +
geom_point(aes(colour=normal)) +
xlab("Time") +
ylab(latex2exp::TeX("$\\Y_t")) +
scale_color_brewer("State", palette = "Dark2")
ggsave("../figures/simulated_normal_state.pdf", g, width = 6,
height = 3)
Now corrupt data
data_stan <- list(
N=nrow(df),
dist=df$obs,
K=2,
beta=0.1
)
n_opt <- 2
fit_normal <- optimise_repeat(n_opt, model_normal, data=data_stan)
fit_student_t <- optimise_repeat(n_opt, model_student_t, data=data_stan)
fit_robust <- optimise_repeat(n_opt, model_robust, data=data_stan)
df <- df %>%
mutate(normal=fit_normal$par$state,
student_t=fit_student_t$par$state,
robust=fit_robust$par$state) %>%
mutate(normal=as.factor(normal),
student_t=as.factor(student_t),
robust=as.factor(robust))
# some issue with algo for t=1
df$normal[1] <- 1
g <- df %>%
ggplot(aes(x=time, y=obs)) +
geom_line() +
geom_point(aes(colour=normal)) +
xlab("Time") +
ylab(latex2exp::TeX("$\\Y_t")) +
scale_color_brewer("State", palette = "Dark2")
ggsave("../figures/simulated_corrupt_normal_state.pdf", g, width = 6,
height = 3)
With 3 states
data_stan <- list(
N=nrow(df),
dist=df$obs,
K=3,
beta=0.1
)
n_opt <- 2
fit_normal <- optimise_repeat(n_opt, model_normal, data=data_stan)
df <- df %>%
mutate(normal=fit_normal$par$state,
student_t=fit_student_t$par$state,
robust=fit_robust$par$state) %>%
mutate(normal=as.factor(normal),
student_t=as.factor(student_t),
robust=as.factor(robust))
# some issue with algo for t=1
df$normal[1] <- 1
g <- df %>%
ggplot(aes(x=time, y=obs)) +
geom_line() +
geom_point(aes(colour=normal)) +
xlab("Time") +
ylab(latex2exp::TeX("$\\Y_t")) +
scale_color_brewer("State", palette = "Dark2")
g
ggsave("../figures/simulated_corrupt_normal_state_3.pdf", g, width = 6,
height = 3)
How many states?
find_optimal_k <- function(n_opt, base_parameters, model, data_stan, ks=2:4) {
bics <- vector(length = length(ks))
for(i in seq_along(bics)){
K <- ks[i]
data_stan$K <- K
fit <- optimise_repeat(n_opt, model, data=data_stan)
log_like <- fit$par$log_p;
num_parameters <- K * base_parameters + K * (K - 1)
bics[i] <- 2 * log_like - num_parameters * log(data_stan$N)
}
bics
}
ks <- seq(2, 4, 1)
bics <- find_optimal_k(5, 3, model_normal, data_stan, ks=ks)
plot(ks, bics)
Look at purity of state estimation
mean(df$state==df$normal)
[1] 0.0409
mean(df$state==df$robust)
[1] 0.9991
mean(df$state==df$student_t)
[1] 0.999
Try fitting a DP to fake HMM data
dfs <- c(3, 3)
df <- simulate_hmm(n_steps, 1, m_transition_probs, 0.2, mus, sigmas, dfs)
df %>%
ggplot(aes(x=time, y=obs)) +
geom_line() +
geom_point(aes(colour=state))
dp <- DirichletProcessGaussian(scale(df$obs), g0Priors = c(0, 1, 2, 1),
alphaPriors = c(0.1, 4))
n_iter <- 400
dp_1 <- Fit(dp, n_iter)
|
| | 0%
|
| | 1%
|
|- | 1%
|
|- | 2%
|
|- | 3%
|
|-- | 3%
|
|-- | 4%
|
|-- | 5%
|
|--- | 5%
|
|--- | 6%
|
|--- | 7%
|
|---- | 7%
|
|---- | 8%
|
|---- | 9%
|
|----- | 9%
|
|----- | 10%
|
|----- | 11%
|
|------ | 11%
|
|------ | 12%
|
|------ | 13%
|
|------- | 13%
|
|------- | 14%
|
|------- | 15%
|
|-------- | 15%
|
|-------- | 16%
|
|-------- | 17%
|
|--------- | 17%
|
|--------- | 18%
|
|--------- | 19%
|
|---------- | 19%
|
|---------- | 20%
|
|---------- | 21%
|
|----------- | 21%
|
|----------- | 22%
|
|----------- | 23%
|
|------------ | 23%
|
|------------ | 24%
|
|------------ | 25%
|
|------------- | 25%
|
|------------- | 26%
|
|------------- | 27%
|
|-------------- | 27%
|
|-------------- | 28%
|
|-------------- | 29%
|
|--------------- | 29%
|
|--------------- | 30%
|
|--------------- | 31%
|
|---------------- | 31%
|
|---------------- | 32%
|
|---------------- | 33%
|
|----------------- | 33%
|
|----------------- | 34%
|
|----------------- | 35%
|
|------------------ | 35%
|
|------------------ | 36%
|
|------------------ | 37%
|
|------------------- | 37%
|
|------------------- | 38%
|
|------------------- | 39%
|
|-------------------- | 39%
|
|-------------------- | 40%
|
|-------------------- | 41%
|
|--------------------- | 41%
|
|--------------------- | 42%
|
|--------------------- | 43%
|
|---------------------- | 43%
|
|---------------------- | 44%
|
|---------------------- | 45%
|
|----------------------- | 45%
|
|----------------------- | 46%
|
|----------------------- | 47%
|
|------------------------ | 47%
|
|------------------------ | 48%
|
|------------------------ | 49%
|
|------------------------- | 49%
|
|------------------------- | 50%
|
|------------------------- | 51%
|
|-------------------------- | 51%
|
|-------------------------- | 52%
|
|-------------------------- | 53%
|
|--------------------------- | 53%
|
|--------------------------- | 54%
|
|--------------------------- | 55%
|
|---------------------------- | 55%
|
|---------------------------- | 56%
|
|---------------------------- | 57%
|
|----------------------------- | 57%
|
|----------------------------- | 58%
|
|----------------------------- | 59%
|
|------------------------------ | 59%
|
|------------------------------ | 60%
|
|------------------------------ | 61%
|
|------------------------------- | 61%
|
|------------------------------- | 62%
|
|------------------------------- | 63%
|
|-------------------------------- | 63%
|
|-------------------------------- | 64%
|
|-------------------------------- | 65%
|
|--------------------------------- | 65%
|
|--------------------------------- | 66%
|
|--------------------------------- | 67%
|
|---------------------------------- | 67%
|
|---------------------------------- | 68%
|
|---------------------------------- | 69%
|
|----------------------------------- | 69%
|
|----------------------------------- | 70%
|
|----------------------------------- | 71%
|
|------------------------------------ | 71%
|
|------------------------------------ | 72%
|
|------------------------------------ | 73%
|
|------------------------------------- | 73%
|
|------------------------------------- | 74%
|
|------------------------------------- | 75%
|
|-------------------------------------- | 75%
|
|-------------------------------------- | 76%
|
|-------------------------------------- | 77%
|
|--------------------------------------- | 77%
|
|--------------------------------------- | 78%
|
|--------------------------------------- | 79%
|
|---------------------------------------- | 79%
|
|---------------------------------------- | 80%
|
|---------------------------------------- | 81%
|
|----------------------------------------- | 81%
|
|----------------------------------------- | 82%
|
|----------------------------------------- | 83%
|
|------------------------------------------ | 83%
|
|------------------------------------------ | 84%
|
|------------------------------------------ | 85%
|
|------------------------------------------- | 85%
|
|------------------------------------------- | 86%
|
|------------------------------------------- | 87%
|
|-------------------------------------------- | 87%
|
|-------------------------------------------- | 88%
|
|-------------------------------------------- | 89%
|
|--------------------------------------------- | 89%
|
|--------------------------------------------- | 90%
|
|--------------------------------------------- | 91%
|
|---------------------------------------------- | 91%
|
|---------------------------------------------- | 92%
|
|---------------------------------------------- | 93%
|
|----------------------------------------------- | 93%
|
|----------------------------------------------- | 94%
|
|----------------------------------------------- | 95%
|
|------------------------------------------------ | 95%
|
|------------------------------------------------ | 96%
|
|------------------------------------------------ | 97%
|
|------------------------------------------------- | 97%
|
|------------------------------------------------- | 98%
|
|------------------------------------------------- | 99%
|
|--------------------------------------------------| 99%
|
|--------------------------------------------------| 100%
dp_2 <- Fit(dp, n_iter)
|
| | 0%
|
| | 1%
|
|- | 1%
|
|- | 2%
|
|- | 3%
|
|-- | 3%
|
|-- | 4%
|
|-- | 5%
|
|--- | 5%
|
|--- | 6%
|
|--- | 7%
|
|---- | 7%
|
|---- | 8%
|
|---- | 9%
|
|----- | 9%
|
|----- | 10%
|
|----- | 11%
|
|------ | 11%
|
|------ | 12%
|
|------ | 13%
|
|------- | 13%
|
|------- | 14%
|
|------- | 15%
|
|-------- | 15%
|
|-------- | 16%
|
|-------- | 17%
|
|--------- | 17%
|
|--------- | 18%
|
|--------- | 19%
|
|---------- | 19%
|
|---------- | 20%
|
|---------- | 21%
|
|----------- | 21%
|
|----------- | 22%
|
|----------- | 23%
|
|------------ | 23%
|
|------------ | 24%
|
|------------ | 25%
|
|------------- | 25%
|
|------------- | 26%
|
|------------- | 27%
|
|-------------- | 27%
|
|-------------- | 28%
|
|-------------- | 29%
|
|--------------- | 29%
|
|--------------- | 30%
|
|--------------- | 31%
|
|---------------- | 31%
|
|---------------- | 32%
|
|---------------- | 33%
|
|----------------- | 33%
|
|----------------- | 34%
|
|----------------- | 35%
|
|------------------ | 35%
|
|------------------ | 36%
|
|------------------ | 37%
|
|------------------- | 37%
|
|------------------- | 38%
|
|------------------- | 39%
|
|-------------------- | 39%
|
|-------------------- | 40%
|
|-------------------- | 41%
|
|--------------------- | 41%
|
|--------------------- | 42%
|
|--------------------- | 43%
|
|---------------------- | 43%
|
|---------------------- | 44%
|
|---------------------- | 45%
|
|----------------------- | 45%
|
|----------------------- | 46%
|
|----------------------- | 47%
|
|------------------------ | 47%
|
|------------------------ | 48%
|
|------------------------ | 49%
|
|------------------------- | 49%
|
|------------------------- | 50%
|
|------------------------- | 51%
|
|-------------------------- | 51%
|
|-------------------------- | 52%
|
|-------------------------- | 53%
|
|--------------------------- | 53%
|
|--------------------------- | 54%
|
|--------------------------- | 55%
|
|---------------------------- | 55%
|
|---------------------------- | 56%
|
|---------------------------- | 57%
|
|----------------------------- | 57%
|
|----------------------------- | 58%
|
|----------------------------- | 59%
|
|------------------------------ | 59%
|
|------------------------------ | 60%
|
|------------------------------ | 61%
|
|------------------------------- | 61%
|
|------------------------------- | 62%
|
|------------------------------- | 63%
|
|-------------------------------- | 63%
|
|-------------------------------- | 64%
|
|-------------------------------- | 65%
|
|--------------------------------- | 65%
|
|--------------------------------- | 66%
|
|--------------------------------- | 67%
|
|---------------------------------- | 67%
|
|---------------------------------- | 68%
|
|---------------------------------- | 69%
|
|----------------------------------- | 69%
|
|----------------------------------- | 70%
|
|----------------------------------- | 71%
|
|------------------------------------ | 71%
|
|------------------------------------ | 72%
|
|------------------------------------ | 73%
|
|------------------------------------- | 73%
|
|------------------------------------- | 74%
|
|------------------------------------- | 75%
|
|-------------------------------------- | 75%
|
|-------------------------------------- | 76%
|
|-------------------------------------- | 77%
|
|--------------------------------------- | 77%
|
|--------------------------------------- | 78%
|
|--------------------------------------- | 79%
|
|---------------------------------------- | 79%
|
|---------------------------------------- | 80%
|
|---------------------------------------- | 81%
|
|----------------------------------------- | 81%
|
|----------------------------------------- | 82%
|
|----------------------------------------- | 83%
|
|------------------------------------------ | 83%
|
|------------------------------------------ | 84%
|
|------------------------------------------ | 85%
|
|------------------------------------------- | 85%
|
|------------------------------------------- | 86%
|
|------------------------------------------- | 87%
|
|-------------------------------------------- | 87%
|
|-------------------------------------------- | 88%
|
|-------------------------------------------- | 89%
|
|--------------------------------------------- | 89%
|
|--------------------------------------------- | 90%
|
|--------------------------------------------- | 91%
|
|---------------------------------------------- | 91%
|
|---------------------------------------------- | 92%
|
|---------------------------------------------- | 93%
|
|----------------------------------------------- | 93%
|
|----------------------------------------------- | 94%
|
|----------------------------------------------- | 95%
|
|------------------------------------------------ | 95%
|
|------------------------------------------------ | 96%
|
|------------------------------------------------ | 97%
|
|------------------------------------------------- | 97%
|
|------------------------------------------------- | 98%
|
|------------------------------------------------- | 99%
|
|--------------------------------------------------| 99%
|
|--------------------------------------------------| 100%
n_clusters_1 <- map_dbl(dp_1$labelsChain, ~max(.))
n_clusters_2 <- map_dbl(dp_2$labelsChain, ~max(.))
tibble(n1=n_clusters_1, n2=n_clusters_2) %>%
mutate(iter=seq_along(n1)) %>%
pivot_longer(-iter) %>%
ggplot(aes(x=iter, y=value, colour=name)) +
geom_line()
dp_1$numberClusters
[1] 7
dp_2$numberClusters
[1] 1
dp_1$alpha
[1] 0.455211
df %>%
mutate(cluster=dp_2$clusterLabels) %>%
ggplot(aes(x=time, y=obs)) +
geom_line() +
geom_point(aes(colour=cluster))
Try tempering the likelihood
Likelihood.tempered <- function(mdobj, x, theta){
beta <- 1
val1 <- 1 / beta * dnorm(x, theta[[1]], theta[[2]])^beta
# val2 <- (1 / (beta + 1)) * integrate(function(x) dnorm(x, theta[[1]], theta[[2]])^(beta + 1), -20, 20)$value
as.numeric(val1)
}
PriorDraw.tempered <- function(mdobj, n=1){
theta <- list()
theta[[1]] = array(rnorm(n, mdobj$priorParameters[1], mdobj$priorParameters[2]), dim=c(1, 1, n))
theta[[2]] = array(rexp(n, mdobj$priorParameters[3]), dim=c(1, 1, n))
theta
}
PriorDensity.tempered <- function(mdobj, theta){
priorParameters <- mdobj$priorParameters
thetaDensity <- dnorm(theta[[1]], priorParameters[1], priorParameters[2])
thetaDensity <- thetaDensity * dexp(theta[[2]], priorParameters[3])
as.numeric(thetaDensity)
}
MhParameterProposal.tempered <- function(mdobj, oldParams){
mhStepSize <- mdobj$mhStepSize
newParams <- oldParams
newParams[[1]] <- oldParams[[1]] + mhStepSize[1]*rnorm(1)
newParams[[2]] <- abs(oldParams[[2]] + mhStepSize[2]*rnorm(1))
newParams
}
temperedMd <- MixingDistribution(distribution = "tempered",
priorParameters = c(0, 5, 0.01),
conjugate = "nonconjugate",
mhStepSize = c(1, 0.1)
)
dp <- DirichletProcessCreate(scale(df$obs), temperedMd)
dp <- Initialise(dp, numInitialClusters = 3)
Accept Ratio: 0.695
dp <- Fit(dp, 200)
|
| | 0%
|
| | 1%
|
|- | 2%
|
|-- | 3%
|
|-- | 4%
|
|-- | 5%
|
|--- | 6%
|
|---- | 7%
|
|---- | 8%
|
|---- | 9%
|
|----- | 10%
|
|------ | 11%
|
|------ | 12%
|
|------ | 13%
|
|------- | 14%
|
|-------- | 15%
|
|-------- | 16%
|
|-------- | 17%
|
|--------- | 18%
|
|---------- | 19%
|
|---------- | 20%
|
|---------- | 21%
|
|----------- | 22%
|
|------------ | 23%
|
|------------ | 24%
|
|------------ | 25%
|
|------------- | 26%
|
|-------------- | 27%
|
|-------------- | 28%
|
|-------------- | 29%
|
|--------------- | 30%
|
|---------------- | 31%
|
|---------------- | 32%
|
|---------------- | 33%
|
|----------------- | 34%
|
|------------------ | 35%
|
|------------------ | 36%
|
|------------------ | 37%
|
|------------------- | 38%
|
|-------------------- | 39%
|
|-------------------- | 40%
|
|-------------------- | 41%
|
|--------------------- | 42%
|
|---------------------- | 43%
|
|---------------------- | 44%
|
|---------------------- | 45%
|
|----------------------- | 46%
|
|------------------------ | 47%
|
|------------------------ | 48%
|
|------------------------ | 49%
|
|------------------------- | 50%
|
|-------------------------- | 51%
|
|-------------------------- | 52%
|
|-------------------------- | 53%
|
|--------------------------- | 54%
|
|---------------------------- | 55%
|
|---------------------------- | 56%
|
|---------------------------- | 57%
|
|----------------------------- | 58%
|
|------------------------------ | 59%
|
|------------------------------ | 60%
|
|------------------------------ | 61%
|
|------------------------------- | 62%
|
|-------------------------------- | 63%
|
|-------------------------------- | 64%
|
|-------------------------------- | 65%
|
|--------------------------------- | 66%
|
|---------------------------------- | 67%
|
|---------------------------------- | 68%
|
|---------------------------------- | 69%
|
|----------------------------------- | 70%
|
|------------------------------------ | 71%
|
|------------------------------------ | 72%
|
|------------------------------------ | 73%
|
|------------------------------------- | 74%
|
|-------------------------------------- | 75%
|
|-------------------------------------- | 76%
|
|-------------------------------------- | 77%
|
|--------------------------------------- | 78%
|
|---------------------------------------- | 79%
|
|---------------------------------------- | 80%
|
|---------------------------------------- | 81%
|
|----------------------------------------- | 82%
|
|------------------------------------------ | 83%
|
|------------------------------------------ | 84%
|
|------------------------------------------ | 85%
|
|------------------------------------------- | 86%
|
|-------------------------------------------- | 87%
|
|-------------------------------------------- | 88%
|
|-------------------------------------------- | 89%
|
|--------------------------------------------- | 90%
|
|---------------------------------------------- | 91%
|
|---------------------------------------------- | 92%
|
|---------------------------------------------- | 93%
|
|----------------------------------------------- | 94%
|
|------------------------------------------------ | 95%
|
|------------------------------------------------ | 96%
|
|------------------------------------------------ | 97%
|
|------------------------------------------------- | 98%
|
|--------------------------------------------------| 99%
|
|--------------------------------------------------| 100%
df %>%
mutate(cluster=dp$clusterLabels) %>%
ggplot(aes(x=time, y=obs)) +
geom_line() +
geom_point(aes(colour=cluster))
Try fitting DP to fake 2 mean data
Fit DP to data
dp_1$alpha
[1] 0.346678
Try with more angle data too
df_s <- df %>%
filter(id==1) %>%
slice(1:200) %>%
select(Dist, Angle) %>%
scale()
df_s %>%
as.data.frame() %>%
mutate(time=seq_along(Dist)) %>%
pivot_longer(-time) %>%
ggplot(aes(x=time, y=value)) +
geom_line() +
facet_wrap(~name)
dp_1 <- DirichletProcessMvnormal(df_s, numInitialClusters = 2, alphaPriors = c(0.001, 4))
dp_2 <- DirichletProcessMvnormal(df_s, numInitialClusters = 2, alphaPriors = c(0.001, 4))
n_iter <- 200
dp_1 <- Fit(dp_1, n_iter)
|
| | 0%
|
| | 1%
|
|- | 2%
|
|-- | 3%
|
|-- | 4%
|
|-- | 5%
|
|--- | 6%
|
|---- | 7%
|
|---- | 8%
|
|---- | 9%
|
|----- | 10%
|
|------ | 11%
|
|------ | 12%
|
|------ | 13%
|
|------- | 14%
|
|-------- | 15%
|
|-------- | 16%
|
|-------- | 17%
|
|--------- | 18%
|
|---------- | 19%
|
|---------- | 20%
|
|---------- | 21%
|
|----------- | 22%
|
|------------ | 23%
|
|------------ | 24%
|
|------------ | 25%
|
|------------- | 26%
|
|-------------- | 27%
|
|-------------- | 28%
|
|-------------- | 29%
|
|--------------- | 30%
|
|---------------- | 31%
|
|---------------- | 32%
|
|---------------- | 33%
|
|----------------- | 34%
|
|------------------ | 35%
|
|------------------ | 36%
|
|------------------ | 37%
|
|------------------- | 38%
|
|-------------------- | 39%
|
|-------------------- | 40%
|
|-------------------- | 41%
|
|--------------------- | 42%
|
|---------------------- | 43%
|
|---------------------- | 44%
|
|---------------------- | 45%
|
|----------------------- | 46%
|
|------------------------ | 47%
|
|------------------------ | 48%
|
|------------------------ | 49%
|
|------------------------- | 50%
|
|-------------------------- | 51%
|
|-------------------------- | 52%
|
|-------------------------- | 53%
|
|--------------------------- | 54%
|
|---------------------------- | 55%
|
|---------------------------- | 56%
|
|---------------------------- | 57%
|
|----------------------------- | 58%
|
|------------------------------ | 59%
|
|------------------------------ | 60%
|
|------------------------------ | 61%
|
|------------------------------- | 62%
|
|-------------------------------- | 63%
|
|-------------------------------- | 64%
|
|-------------------------------- | 65%
|
|--------------------------------- | 66%
|
|---------------------------------- | 67%
|
|---------------------------------- | 68%
|
|---------------------------------- | 69%
|
|----------------------------------- | 70%
|
|------------------------------------ | 71%
|
|------------------------------------ | 72%
|
|------------------------------------ | 73%
|
|------------------------------------- | 74%
|
|-------------------------------------- | 75%
|
|-------------------------------------- | 76%
|
|-------------------------------------- | 77%
|
|--------------------------------------- | 78%
|
|---------------------------------------- | 79%
|
|---------------------------------------- | 80%
|
|---------------------------------------- | 81%
|
|----------------------------------------- | 82%
|
|------------------------------------------ | 83%
|
|------------------------------------------ | 84%
|
|------------------------------------------ | 85%
|
|------------------------------------------- | 86%
|
|-------------------------------------------- | 87%
|
|-------------------------------------------- | 88%
|
|-------------------------------------------- | 89%
|
|--------------------------------------------- | 90%
|
|---------------------------------------------- | 91%
|
|---------------------------------------------- | 92%
|
|---------------------------------------------- | 93%
|
|----------------------------------------------- | 94%
|
|------------------------------------------------ | 95%
|
|------------------------------------------------ | 96%
|
|------------------------------------------------ | 97%
|
|------------------------------------------------- | 98%
|
|--------------------------------------------------| 99%
|
|--------------------------------------------------| 100%
dp_2 <- Fit(dp_2, n_iter)
|
| | 0%
|
| | 1%
|
|- | 2%
|
|-- | 3%
|
|-- | 4%
|
|-- | 5%
|
|--- | 6%
|
|---- | 7%
|
|---- | 8%
|
|---- | 9%
|
|----- | 10%
|
|------ | 11%
|
|------ | 12%
|
|------ | 13%
|
|------- | 14%
|
|-------- | 15%
|
|-------- | 16%
|
|-------- | 17%
|
|--------- | 18%
|
|---------- | 19%
|
|---------- | 20%
|
|---------- | 21%
|
|----------- | 22%
|
|------------ | 23%
|
|------------ | 24%
|
|------------ | 25%
|
|------------- | 26%
|
|-------------- | 27%
|
|-------------- | 28%
|
|-------------- | 29%
|
|--------------- | 30%
|
|---------------- | 31%
|
|---------------- | 32%
|
|---------------- | 33%
|
|----------------- | 34%
|
|------------------ | 35%
|
|------------------ | 36%
|
|------------------ | 37%
|
|------------------- | 38%
|
|-------------------- | 39%
|
|-------------------- | 40%
|
|-------------------- | 41%
|
|--------------------- | 42%
|
|---------------------- | 43%
|
|---------------------- | 44%
|
|---------------------- | 45%
|
|----------------------- | 46%
|
|------------------------ | 47%
|
|------------------------ | 48%
|
|------------------------ | 49%
|
|------------------------- | 50%
|
|-------------------------- | 51%
|
|-------------------------- | 52%
|
|-------------------------- | 53%
|
|--------------------------- | 54%
|
|---------------------------- | 55%
|
|---------------------------- | 56%
|
|---------------------------- | 57%
|
|----------------------------- | 58%
|
|------------------------------ | 59%
|
|------------------------------ | 60%
|
|------------------------------ | 61%
|
|------------------------------- | 62%
|
|-------------------------------- | 63%
|
|-------------------------------- | 64%
|
|-------------------------------- | 65%
|
|--------------------------------- | 66%
|
|---------------------------------- | 67%
|
|---------------------------------- | 68%
|
|---------------------------------- | 69%
|
|----------------------------------- | 70%
|
|------------------------------------ | 71%
|
|------------------------------------ | 72%
|
|------------------------------------ | 73%
|
|------------------------------------- | 74%
|
|-------------------------------------- | 75%
|
|-------------------------------------- | 76%
|
|-------------------------------------- | 77%
|
|--------------------------------------- | 78%
|
|---------------------------------------- | 79%
|
|---------------------------------------- | 80%
|
|---------------------------------------- | 81%
|
|----------------------------------------- | 82%
|
|------------------------------------------ | 83%
|
|------------------------------------------ | 84%
|
|------------------------------------------ | 85%
|
|------------------------------------------- | 86%
|
|-------------------------------------------- | 87%
|
|-------------------------------------------- | 88%
|
|-------------------------------------------- | 89%
|
|--------------------------------------------- | 90%
|
|---------------------------------------------- | 91%
|
|---------------------------------------------- | 92%
|
|---------------------------------------------- | 93%
|
|----------------------------------------------- | 94%
|
|------------------------------------------------ | 95%
|
|------------------------------------------------ | 96%
|
|------------------------------------------------ | 97%
|
|------------------------------------------------- | 98%
|
|--------------------------------------------------| 99%
|
|--------------------------------------------------| 100%
n_clusters_1 <- map_dbl(dp_1$labelsChain, ~max(.))
n_clusters_2 <- map_dbl(dp_2$labelsChain, ~max(.))
tibble(n1=n_clusters_1, n2=n_clusters_2) %>%
mutate(iter=seq_along(n1)) %>%
pivot_longer(-iter) %>%
ggplot(aes(x=iter, y=value, colour=name)) +
geom_line()
Look at clusters
x <- read.csv("../data/hmm_test.csv", header = FALSE)
x <- x %>%
slice(1:nrow(x))
dp_1 <- DirichletProcessGaussian(
scale(x$V2), alphaPriors = c(4, 4))
dp_2 <- DirichletProcessGaussian(
scale(x$V2), alphaPriors = c(4, 4))
dp_1 <- Initialise(dp_1, numInitialClusters = 1)
dp_2 <- Initialise(dp_2, numInitialClusters = 10)
n_iter <- 2000
dp_1 <- Fit(dp, n_iter)
|
| | 0%
|
| | 1%
|
|- | 1%
|
|- | 2%
|
|- | 3%
|
|-- | 3%
|
|-- | 4%
|
|-- | 5%
|
|--- | 5%
|
|--- | 6%
|
|--- | 7%
|
|---- | 7%
|
|---- | 8%
|
|---- | 9%
|
|----- | 9%
|
|----- | 10%
|
|----- | 11%
|
|------ | 11%
|
|------ | 12%